using Distributions, Optim, Plots, ProgressMeter, Weave
using CSV, Tables, StatsBase, Distributions

# N = 260601 # is full size!
N = 10000

@show N

#Please change this path into your personal folder where the data files were placed according to the readme file explanations.
application_data = CSV.read("folder/data/sample.csv", Tables.matrix)
application_data = application_data[1:N,:]

draw_bootstrap_sample = function(df)
    n, K = size(df)
    s_index = sample(1:n,n)
    return df[s_index,:] 
end

SS = 1000
Kd = 5 # 5 discrete regressors

sigma_vals = [0.1 .25 0.5 1. 2.]
sigma_vals = [1.]
n_sigma = length(sigma_vals)

# b1 is the vector of coefficient estimates on the discrete regressors
b1 = zeros(SS,Kd)
b1_mrv = zeros(SS,Kd)

# b2 is the coefficient on the continuous regressors
b2 = zeros(SS)
b2_mrv = zeros(SS,n_sigma)

# autoregressive coefficient
r_mrv = zeros(SS,n_sigma)

# thresholds
g2_mrv = zeros(SS,n_sigma)
g4_mrv = zeros(SS,n_sigma)

J = 4 # (1) bad and very bad; (2) fair; (3) good; (4) very good
k = 3 # see equation (25) in the resubmission from July 2021
            
#     child  married unemp  retired other 
β1 = [-0.030 0.139   -0.188 -0.132  -0.370]
β2 = [0.049] # coef on log income

# Generate data from a DGP where lagged dependent var is not binarized
ρ = [0.5  0.733 0.9]    # rho_1 = 0, ρ[1] = rho2, coef on lag Y == 2, ρ[2] = rho3, coef on lag Y == 3
ρ = [0.0  0.733 0.733]    # rho_1 = 0, ρ[1] = rho2, coef on lag Y == 2, ρ[2] = rho3, coef on lag Y == 3

## We consider deviations of the type...
dev = 0.1 #substitute by [0 0.1 0.2 0.3 0.4]
ρ[1] = dev
ρ[3] = ρ[2] + dev

## set other auto-regressive coefficients

γ2 = [-3.275]
γ3 = [0.0]
γ4 = [3.326]

for s in 1:SS

    df_s = draw_bootstrap_sample(application_data)

    Y0 = df_s[:,2]
    D0 = ifelse.(Y0 .>= 3.,1,0)

    Xc_1 = df_s[:,3] #income, child, married, unemp, retired, other
    Xd_1 = df_s[:,[6;  9; 12; 15; 18]] #income, child, married, unemp, retired, other

    Xc_2 = df_s[:,4]
    Xd_2 = df_s[:,[7; 10; 13; 16; 19]]

    Xc_3 = df_s[:,5]
    Xd_3 = df_s[:,[8; 11; 14; 17; 20]]
    
    n = length(Y0)
    
    dA = Normal(0,1)
    A = rand(dA,n)

    dU = Logistic(0,1)
    U1 = rand(dU,n)
    U2 = rand(dU,n)
    U3 = rand(dU,n)

    Y1_star = Xd_1*β1' + Xc_1.*β2 + U1 + A + (Y0.==2).*ρ[1] + (Y0.==3).*ρ[2] +(Y0.==4).*ρ[3] #D0.*ρ
    Y1 = ones(n)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ2,1,0)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ3,1,0)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ4,1,0)
    D1 = ifelse.(Y1_star.> γ3,1,0)

    Y2_star = Xd_2*β1' + Xc_1.*β2 + U2 + A + (Y1.==2).*ρ[1] + (Y1.==3).*ρ[2] +(Y1.==4).*ρ[3] #D0.*ρ
    Y2 = ones(n)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ2,1,0)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ3,1,0)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ4,1,0)
    D2 = ifelse.(Y2_star.> γ3,1,0)

    Y3_star = Xd_3*β1' + Xc_1.*β2 + U3 + A + (Y2.==2).*ρ[1] + (Y2.==3).*ρ[2] +(Y2.==4).*ρ[3] #D0.*ρ
    Y3 = ones(n)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ2,1,0)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ3,1,0)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ4,1,0)
    D3 = ifelse.(Y3_star.> γ3,1,0)

    ## Glue the discrete and continous regressors
    X_1 = [Xd_1 Xc_1]
    X_2 = [Xd_2 Xc_2]
    X_3 = [Xd_3 Xc_3]

    ## Effective change in X (middle period)
    DX = X_2 - X_1
    # definition of switcher for binarized HK / binarized FEOL
    S = ifelse.(D1 .+ D2 .== 1,1,0)

    ## Binary indicator that is 1 if discrete Xs do not change from period 2 to 3.
    fix_Xd = minimum(Xd_2 .== Xd_3, dims = 2)

    DX_f = DX[fix_Xd[:,1],:]
    Xc_3_f = Xc_3[fix_Xd[:,1]]
    Xc_2_f = Xc_2[fix_Xd[:,1]]
    D1_f = D1[fix_Xd[:,1]]
    D2_f = D2[fix_Xd[:,1]]
    D3_f = D3[fix_Xd[:,1]]
    D0_f = D0[fix_Xd[:,1]]
    S_f = S[fix_Xd[:,1]]
    Y2_f = Y2[fix_Xd[:,1]]
    Y3_f = Y3[fix_Xd[:,1]]

    function ll_mrv(brc, sigma)

        bb1 = brc[1:Kd]
        bb2 = brc[Kd+1]
        r = brc[Kd+2]
        c = [0.0 brc[Kd+3] 0.0 brc[Kd+4]]  #first one is dummy, gamma2, gamm3, gamm4

        gaus_kernel = Normal(0,1)
        Xc_weight = pdf.(gaus_kernel,(Xc_3_f - Xc_2_f)./sigma)

        rval = 0.0
        for j in 2:3
            for l in 3:4

                # D0 is set
                # D1 is set

                # Compute D2 
                D2l = ifelse.(Y2_f .>= l,1,0)
                D2j = ifelse.(Y2_f .>= j,1,0)
                D2jl = ifelse.(D1_f .== 1, D2j, D2l)
                
                # Compute D3
                D3j = ifelse.(Y3_f .>= j,1,0)
                D3l = ifelse.(Y3_f .>= l,1,0)
                D3jl = ifelse.(D1_f .== 1, D3l, D3j)

                Smrv = ifelse.(D1_f .== D2jl,0,1)

                Zjl = hcat(-DX_f,D0_f .- D3jl, D3jl, 1.0 .- D3jl)
                thetajl = vcat(bb1,bb2,r,c[j],c[l])
                index = Zjl * thetajl

                rval = rval -sum(Xc_weight .* Smrv .* (D1_f .* log.(cdf(dU,index)) + (1 .- D1_f) .* log.(1 .-cdf(dU,index))))

            end
        end

        return rval

    end

    ## Compute the estimator for every value of the bandwidth parameter.
    for sig_index in 1:n_sigma
        func_mrv = TwiceDifferentiable(brc -> ll_mrv(brc, sigma_vals[sig_index]), ones(9); autodiff=:forward)
        yay_mrv = optimize(func_mrv, [β1';β2;ρ[2];γ2;γ4])
        b1_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd]
        b2_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+1]
        r_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+2]
        g2_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+3]
        g4_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+4]
    end
end

@show N
@show SS
@show sigma_vals
@show dev

@show β1[5]
@show mean(b1_mrv, dims=1)
@show std(b1_mrv, dims=1)


@show β2
@show mean(b2_mrv, dims = 1)
@show std(b2_mrv, dims = 1)

@show ρ
@show mean(r_mrv, dims = 1)
@show std(r_mrv, dims = 1)

@show [γ2 γ4]
@show mean(g2_mrv, dims = 1)
@show std(g2_mrv, dims = 1)
@show mean(g4_mrv, dims = 1);
@show std(g4_mrv, dims = 1);
